
// Create figures E1, E2 and F1 based on driving data

/* =============================================================================
	STEP 1: Load driving data
============================================================================= */
use "${dataout}DrivingDataset.dta", clear

/* =============================================================================
	STEP 2: Run regressions to create residualised values for treatment variables
============================================================================= */

/* First, run regression using LHS variables as outcomes and save residuals */
local xvarlist toll_fam_mean ptl_fam_km_mean

foreach x in `xvarlist' {
	* In case this part of the code is re-ran, drop variables that are created
	capt drop res_`x'
	capt drop resM_`x'
	* Same controls as in "Table5.do", column (3)
	reghdfe `x' ///
			$householdD $employmentD $ageD $distanceD $timeD $publictimeD $publicqualityD ///	
			$carvarD ///
			year_* month_* $selectionD ///
			if    year_2005 == 0 ///
				& year_2006 == 0 ///
				& year_2007 == 0 ///
				& year_2008 == 0 ///
				& year_2009 == 0 ///
				& year_2010 == 0 ///
				& year_2011 == 0 ///
				& year_2012 == 0 ///
				& electric == 0 ///
				& weight > 0.5 ///
				& lnkm > 0 ///
			, abs($FED $incomeD $educationD) ///
			vce(cluster $FED) residuals(res_`x') 
	* We are only interested in the estimation sample --> drop remaining observations		
	keep if e(sample) == 1		
	qui sum `x'  if e(sample)==1
	* Saving residuals where the average value of the outcome is added back
	gen resM_`x'=  res_`x' + `r(mean)'
}

* Labeling variables
label variable km_dag "Driving, km per day"
label var lnkm "Log driving, km per day"
label variable toll_fam_mean "Road toll (NOK)" 
label variable ptl_fam_km_mean "Public transport lane (km)"

/* =============================================================================
	STEP 3: Create Figure F1
============================================================================= */

* Plotting population mean added residuals of toll and bus lane variables
local xvarlist toll_fam_mean ptl_fam_km_mean
foreach x in `xvarlist' {
	local labx: variable label `x'

	sum resM_`x' , det
	histogram resM_`x'   if resM_`x'   >`r(p1)'  & resM_`x'  <`r(p99)' ///
	, percent scheme(s2mono) graphregion(color(white))  xtitle("`labx'") scale(1.3) ///
	color(gs10)  lcolor(gs5) name(figureF1_`x', replace)
	graph export     "${figures}FigureF1_`x'.png" , replace
	graph export     "${figures}FigureF1_`x'.pdf" , replace
	graph save       "${figures}FigureF1_`x'.gph" , replace
}

/* =============================================================================
=== Step 4: Create Figure E.1 based on data where e(sample) == 1 (see line 39)
============================================================================= */

histogram km_dag ///
, percent scheme(s2mono) graphregion(color(white))  xtitle("Driving per car per day (km)") scale(1.3) ///
color(gs10)  lcolor(gs5) name(figureE1a, replace)
graph export     "${figures}FigureE1a.png" , replace
graph export     "${figures}FigureE1a.pdf" , replace
graph save       "${figures}FigureE1a.gph" , replace

histogram lnkm   ///
, percent scheme(s2mono) graphregion(color(white))  xtitle("Log driving per car per day (km)") scale(1.3) ///
color(gs10)  lcolor(gs5) name(figureE1b, replace)
graph export     "${figures}FigureE1b.png" , replace
graph export     "${figures}FigureE1b.pdf" , replace
graph save       "${figures}FigureE1b.gph" , replace

*===============================================================================
 // Step 5: Create Figure E.2a
*===============================================================================

// Creating intervals for road tolls 
sum toll_fam_mean, det
capt drop toll_fam_mean_int
gen toll_fam_mean_int = .
replace toll_fam_mean_int = 0 if toll_fam_mean == 0
local count=0
forvalues i=0(5)45 {
	local count = `count' + 5
	replace toll_fam_mean_int=`count' if toll_fam_mean>`i' & toll_fam_mean<=`count'
}
replace toll_fam_mean_int=50 if toll_fam_mean>=50 & toll_fam_mean!=.

/* Which values to plot per bin */
bysort toll_fam_mean_int: egen toll_bin=mean(toll_fam_mean)
bysort toll_fam_mean_int: egen toll_km_bin=mean(km_dag)
bysort toll_fam_mean_int: egen toll_lnkm_bin=mean(lnkm)
bysort toll_fam_mean_int: egen toll_bin_count=count(familie_id)

/* We only need to draw one circle for each bin */
bysort toll_fam_mean_int: replace toll_lnkm_bin = . if _n != 1

// Re-run the equation from lfit to get access to clustered estimates
capt gen helpvar=1
reghdfe lnkm toll_fam_mean , absorb(helpvar)  vce(cluster grkrets_num grk_bed1_num grk_bed2_num) 
eststo
drop helpvar 

local coeff _b[toll_fam_mean]
local fmtcoeff : dis %7.5f `coeff'
dis `fmtcoeff'

local se _se[toll_fam_mean]
local fmtse : dis %7.6f `se'
dis `fmtse'

twoway ///
	(scatter toll_lnkm_bin toll_fam_mean_int [w=toll_bin_count] ///
		if toll_fam_mean_int <= 50, msymbol(circle_hollow) mcolor(navy) ) ///
	(lfit lnkm toll_fam_mean ///
		if toll_fam_mean <= 50 , lcolor(navy) estopts(beta) ) ///
	, ytitle("Log driving, km per day per car") ylab(3(0.25)4)    ///
	xtitle("Road toll (NOK)") xlab(0(5)50) ///
	graphregion(color(white))  yscale(titlegap(1.5)) xscale(titlegap(1.5))  ///
	legend(off) scale(1.25) name(figureE2a, replace) ///
	text(3.06 44 "Coeff: `fmtcoeff'")  ///
	text(3.007 46  "(`fmtse')")  

graph export     "${figures}FigureE2a.png" , replace
graph export     "${figures}FigureE2a.pdf" , replace
graph save       "${figures}FigureE2a.gph" , replace

*===============================================================================
 // Step 6: Create Figure E.2b
*===============================================================================

//intervals 
sum ptl_fam_km_mean, det
capt drop ptl_fam_km_mean_int
gen ptl_fam_km_mean_int=.
replace ptl_fam_km_mean_int=0 if ptl_fam_km_mean==0
local count=0
dis `count'
forvalues i=0(0.5)4.5 {
local count = `count' + 0.5
dis `count'
replace ptl_fam_km_mean_int=`count' if ptl_fam_km_mean>`i' & ptl_fam_km_mean<=`count'
}
replace ptl_fam_km_mean_int=5 if ptl_fam_km_mean>=5 & ptl_fam_km_mean!=.
tab ptl_fam_km_mean_int

sum ptl_fam_km_mean if ptl_fam_km_mean_int==0

bysort ptl_fam_km_mean_int: egen ptl_bin=mean(ptl_fam_km_mean)
bysort ptl_fam_km_mean_int: egen km_ptl_bin=mean(km_dag)
bysort ptl_fam_km_mean_int: egen lnkm_ptl_bin=mean(lnkm)
bysort ptl_fam_km_mean_int: egen ptl_bin_count=count(familie_id)

bysort ptl_fam_km_mean_int: replace lnkm_ptl_bin = . if _n != 1

// Add equation from lfit
capt gen helpvar=1
reghdfe lnkm ptl_fam_km_mean , absorb(helpvar)  vce(cluster grkrets_num grk_bed1_num grk_bed2_num) 
eststo
drop helpvar 


local coeff _b[ptl_fam_km_mean]
local fmtcoeff : dis %7.5f `coeff'
dis `fmtcoeff'

local se _se[ptl_fam_km_mean]
local fmtse : dis %7.6f `se'
dis `fmtse'

twoway ///
(scatter lnkm_ptl_bin ptl_fam_km_mean_int [w=ptl_bin_count] if ptl_fam_km_mean_int<=5, msymbol(circle_hollow) mcolor(navy) ) ///
(lfit lnkm ptl_fam_km_mean if ptl_fam_km_mean<=5 , lcolor(navy) estopts(beta) ) ///
, ytitle("Log driving, km per day per car") ylab(3(0.25)4)    ///
xtitle("Public transport lane (km)") xlab(0(1)5) ///
graphregion(color(white))  yscale(titlegap(1.5)) xscale(titlegap(1.5))  ///
legend(off) scale(1.25) name(figureE2b, replace) ///
text(3.06   4.4 "Coeff: `fmtcoeff'")  ///
text(3.007 4.6  "(`fmtse')")    

graph export     "${figures}FigureE2b.png" , replace
graph export     "${figures}FigureE2b.pdf" , replace
graph save       "${figures}FigureE2b.gph" , replace
